clear % Clear workspace variables
close all % Close all open figures
load dataforPub.mat % Load data from 'dataforPub.mat' file

%options
witGPS=1; %include GPS in benefits

if witGPS==0
    Benefit=BenefitWithoutGPS;
end

%start creating new variables
N=length(Type); % Total number of observations (entries) in 'Type'
rtN=sqrt(N); % Square root of total number of observations
Age=Age+5; % Increase each 'Age' value by 5
Timestamp=datetime(Timestamp); % Convert 'Timestamp' to datetime format
Timestamp(379)='01/30/2018 08:40:18'; % Correct a specific wrongly inserted date


%make codes for routes etc
for i=1:N
    puf=unicode2native(char(Assistant(i))); % Convert Assistant names to numeric codes
    assist(i)=sum(puf); % Sum of numeric codes for Assistant
    
    paf=unicode2native(char(Route(i))); % Convert Route names to numeric codes
    route(i)=sum(paf); % Sum of numeric codes for Route
    
    agis=string(Route(i)); % Convert Route to string for matching
    agisilaos=find(strcmp(agis,table2array(routestats(:,1)))); % Find index of Route in routestats table
    apostaseis(i)=table2array(routestats(agisilaos,2)); % Distance for the route
    hronoi(i)=table2array(routestats(agisilaos,3)); % Time for the route
end


% Transpose vectors for consistency
apostaseis=apostaseis';
hronoi=hronoi';
assist=assist';
route=route';

% Convert 'Type' and 'Route' to nominal variables for analysis
TypeName=nominal(Type);
Route=nominal(Route);

% Create unique session identifiers based on date and route
d=day(Timestamp);
m=month(Timestamp);
y=year(Timestamp);
puf=10*(d+m*31+y*365); % Unique code based on date
for i=1:N
    unqrt=unique(route(d==d(i) & m==m(i) & y==y(i))); % Unique routes on the same day
    seqinday=find(unqrt==route(i)); % Sequence of the route in the day
    session(i)=puf(i)+seqinday; % Session identifier
end


% Ensure unique session identifiers are sequential
sortedses=sort(session);
puf=unique(sortedses);
for i=1:length(puf)
   session(session==puf(i))=i; 
end
ses=session;
session=categorical(session'); % Convert session to categorical

% Check if sessions have the right number of observations
for i=1:max(ses)
    plup(i)=sum(ses==i);
end


% Adjust for double sessions in a day
pita=find(plup==8); % Find sessions with 8 observations (double sessions)
for i=1:length(pita)
    pitoula=ses(find(ses==pita(i)));
    pitoula(5:8)=max(ses)+i; % Assign a new session identifier to the second half
    session(find(ses==pita(i)))=categorical(pitoula);
end
ses=double(session)'; % Convert session back to numeric and transpose



%%%%% HERE THE ACTUAL ANALYSIS CAN START

% Re-calculate number of observations
N=length(Type);

%Table 1
disp('TABLE 1')
disp('Age and Sex, by type')
disp([mage;msex])
disp('number of observations by Type')
disp([sum(Type==1) sum(Type==2) sum(Type==3) sum(Type==4)])

disp('end table')

% Perform Jonkheere test for overall ranking
disp('Jonkheere test for overall ranking');
jttrend([RatingExper(isnan(Type)==0) Type(isnan(Type)==0)],[1 2 3 4]);

%Table 2
disp('TABLE 2')

% Display the mean 'Charged' values for each type
disp('mean Charged: Type 1 - 4')
rat1=Charged(Type==1); % Selects 'Charged' values where Type is 1
rat2=Charged(Type==2); % Selects 'Charged' values where Type is 2
rat3=Charged(Type==3); % Selects 'Charged' values where Type is 3
rat4=Charged(Type==4); % Selects 'Charged' values where Type is 4

disp([mean(rat1) mean(rat2) mean(rat3) mean(rat4)]) % Displays the mean 'Charged' values for Types 1-4

% Calculate and display the standard error of the mean for 'Charged' in each type
disp('SE of the mean')
flopo=rat1; % Temporarily stores 'Charged' values for Type 1
SEcharged(1)=std(flopo)/sqrt(length(flopo)); % Calculates SE for Type 1
flopo=rat2; % Temporarily stores 'Charged' values for Type 2
SEcharged(2)=std(flopo)/sqrt(length(flopo)); % Calculates SE for Type 2
flopo=rat3; % Temporarily stores 'Charged' values for Type 3
SEcharged(3)=std(flopo)/sqrt(length(flopo)); % Calculates SE for Type 3
flopo=rat4; % Temporarily stores 'Charged' values for Type 4
SEcharged(4)=std(flopo)/sqrt(length(flopo)); % Calculates SE for Type 4
disp(SEcharged) % Displays the SE values


% Perform and display the results of Wilcoxon rank sum tests between each pair of types for 'Charged'
disp('Wilcoxon test p-values matrix: Types 1-4 vs 1-4')
% The matrix shows the p-values from comparing distributions of 'Charged' between all pairs of types
% Note: The script includes self-comparisons (e.g., rat1 vs. rat1), which will always yield a p-value of 1 since it's the same group
disp([ranksum(rat1,rat1) ranksum(rat1,rat2) ranksum(rat1,rat3) ranksum(rat1,rat4) 
ranksum(rat2,rat1) ranksum(rat2,rat2) ranksum(rat2,rat3) ranksum(rat2,rat4) 
ranksum(rat3,rat1) ranksum(rat3,rat2) ranksum(rat3,rat3) ranksum(rat3,rat4) 
ranksum(rat4,rat1) ranksum(rat4,rat2) ranksum(rat4,rat3) ranksum(rat4,rat4)])


% Display the mean 'RatingExper' values for each type
disp('mean rating: Type 1 - 4')
% Increase every value in RatingExper by 1. This operation is done in-place, affecting the variable for subsequent calculations.
RatingExper=RatingExper+1;
% Select and store ratings by type, then display their means
rat1=RatingExper(Type==1); % Ratings for Type 1
rat2=RatingExper(Type==2); % Ratings for Type 2
rat3=RatingExper(Type==3); % Ratings for Type 3
rat4=RatingExper(Type==4); % Ratings for Type 4
disp([mean(rat1) mean(rat2) mean(rat3) mean(rat4)]) % Display mean ratings for each type

% Calculate and display the standard error of the mean for 'RatingExper' in each type
disp('SE of the mean rating')
% Calculate and display SE for each type's ratings
SErating(1)=std(rat1)/sqrt(length(rat1)); % SE for Type 1
SErating(2)=std(rat2)/sqrt(length(rat2)); % SE for Type 2
SErating(3)=std(rat3)/sqrt(length(rat3)); % SE for Type 3
SErating(4)=std(rat4)/sqrt(length(rat4)); % SE for Type 4
disp(SErating) % Display the SE for ratings of each type

% Perform and display the results of Wilcoxon rank sum tests between each pair of types for 'RatingExper'
disp('Wilcoxon test p-values matrix: Types 1-4 vs 1-4')
% The matrix shows the p-values from comparing distributions of 'RatingExper' between all pairs of types
% Note: The script includes self-comparisons (e.g., rat1 vs. rat1), which will always yield a p-value of 1 since it's the same group
disp([ranksum(rat1,rat1) ranksum(rat1,rat2) ranksum(rat1,rat3) ranksum(rat1,rat4) 
ranksum(rat2,rat1) ranksum(rat2,rat2) ranksum(rat2,rat3) ranksum(rat2,rat4) 
ranksum(rat3,rat1) ranksum(rat3,rat2) ranksum(rat3,rat3) ranksum(rat3,rat4) 
ranksum(rat4,rat1) ranksum(rat4,rat2) ranksum(rat4,rat3) ranksum(rat4,rat4)])

%Experience ratings, by rating and type
    disp('Experience ratings, by rating and type')
% Initialize loop to iterate through each type
for i=1:4
    numbers(i)=sum(Type==i); % Count the number of entries for each type and store it
    
    % Nested loop to iterate through rating values
    for j=0:4
        rat(j+1,i)=sum(Type==i & RatingExper==j+1); % Count how many of each rating value there are for each type
    end
    
    % Calculate and store the mean rating, age, and sex for each type
    mrat(i)=mean(RatingExper(Type==i)); % Average rating for each type
    mage(i)=mean(Age(Type==i)); % Average age for each type
    msex(i)=mean(Sex(Type==i)); % Average sex for each type
end

% Normalize the count of ratings per type to percentages
rat=rat./repmat(numbers,5,1)*100; % Divide each count by the total number of entries per type and multiply by 100 to get percentages

% Display the rating distributions as percentages, rounded to two decimal places
% The 'flipud' function is used to flip the matrix upside down before displaying it.
disp(round(flipud(rat),2))


% Display the mean 'Foul' values for each type
disp('mean fouls by type')
% Select and store fouls by type
rat1=Foul(Type==1); %Fouls for type 1
rat2=Foul(Type==2); %Fouls for type 2
rat3=Foul(Type==3); %Fouls for type 3
rat4=Foul(Type==4); %Fouls for type 4
disp([mean(rat1) mean(rat2) mean(rat3) mean(rat4)])  %Display mean fouls for each type

% Calculate and display the standard error of the mean for 'Foul' in each type
disp('SE fouls')
% Loops through each type to calculate and store SE of "Foul"
for i=1:4
    SEfoul(i)=std(Foul(Type==i))/sqrt(length(Foul(Type==i)));
end
disp(SEfoul)

% Perform and display the results of Wilcoxon rank sum tests between each pair of types for 'Foul'
disp('Wilcoxon test p-values matrix: Types 1-4 vs 1-4')
% The matrix shows the p-values from comparing distributions of 'Foul' between all pairs of types
% Note: The script includes self-comparisons (e.g., rat1 vs. rat1), which will always yield a p-value of 1 since it's the same group
disp([ranksum(rat1,rat1) ranksum(rat1,rat2) ranksum(rat1,rat3) ranksum(rat1,rat4) 
ranksum(rat2,rat1) ranksum(rat2,rat2) ranksum(rat2,rat3) ranksum(rat2,rat4) 
ranksum(rat3,rat1) ranksum(rat3,rat2) ranksum(rat3,rat3) ranksum(rat3,rat4) 
ranksum(rat4,rat1) ranksum(rat4,rat2) ranksum(rat4,rat3) ranksum(rat4,rat4)])

% Display the mean 'Benefit' values for each type
disp('mean Benefit Type 1 - 4')
% Select and store benefits by type
rat1=Benefit(Type==1); %Benefits for type 1
rat2=Benefit(Type==2); %Benefits for type 2
rat3=Benefit(Type==3); %Benefits for type 3
rat4=Benefit(Type==4); %Benefits for type 4
disp([mean(rat1) mean(rat2) mean(rat3) mean(rat4)]) %Display mean benefits for each type

% Calculate and display the standard error of the mean for 'Benefit' in each type
disp('SE benefit')
% Loops through each type to calculate and store SE of "Benefit"
for i=1:4
    SEBenefit(i)=std(Benefit(Type==i))/sqrt(length(Benefit(Type==i)));
end
disp(SEBenefit)

% Perform and display the results of Wilcoxon rank sum tests between each pair of types for 'Benefit'
disp('Wilcoxon test p-values matrix: Types 1-4 vs 1-4')
% The matrix shows the p-values from comparing distributions of 'Benefit' between all pairs of types
% Note: The script includes self-comparisons (e.g., rat1 vs. rat1), which will always yield a p-value of 1 since it's the same group
disp([ranksum(rat1,rat1) ranksum(rat1,rat2) ranksum(rat1,rat3) ranksum(rat1,rat4) 
ranksum(rat2,rat1) ranksum(rat2,rat2) ranksum(rat2,rat3) ranksum(rat2,rat4) 
ranksum(rat3,rat1) ranksum(rat3,rat2) ranksum(rat3,rat3) ranksum(rat3,rat4) 
ranksum(rat4,rat1) ranksum(rat4,rat2) ranksum(rat4,rat3) ranksum(rat4,rat4)])

disp('end table')

%%REGRESSIONS

% Run the regressions displayed in Table 3
disp('TABLE 3')
%Create table to use in regression
tbl=table(Foul,Benefit,Charged,Assistant,TypeName,Route,RatingExper,apostaseis,hronoi,session,Age, Sex);

%favourite regressions, TABLE 3
mdl=fitlm(tbl,'Charged ~  Assistant + TypeName + apostaseis + hronoi +Age+Sex ')
mdl=fitlm(tbl,'RatingExper ~  Assistant + TypeName + apostaseis + hronoi +Age+Sex ')
